Source codes: https://github.com/Anne-LiseGerard/dftd_rnaseq
suppressPackageStartupMessages({
library("VennDiagram")
library("ggplot2")
library("mitch")
library("dplyr")
library("kableExtra")
library("limma")
library("usefun")
library("viridis")
library("ggh4x")
library("tidyr")
})
knitr::opts_chunk$set(dev = 'svg')
Goal: compare expression profiles of DFT1 vs DFT2 tumour biopsies and cell lines.Two contrasts: DFT1 vs DFT2, then compare that first contrast in cell lines vs biopsies.
display_venn <- function(x, ...){
library(VennDiagram)
grid.newpage()
venn_object <- venn.diagram(x, filename = NULL, ...,
height = 480 , width = 480 ,
resolution = 300,
lwd = 2,
col=c("#B3B9DF", "#DDAFB7"),
fill = c(alpha("#B3B9DF",0.3), alpha("#DDAFB7",0.3)),
cex = 1,
fontfamily = "sans",
cat.cex = 0.9,
cat.default.pos = "outer",
cat.dist = c(0.05, 0.05),
cat.pos = c(-25,25),
cat.fontfamily = "sans",
cat.col = c("#B3B9DF", "#DDAFB7"),
main.cex = 1,
main.fontfamily = "sans",
sub.cex = 0.8,
sub.fontfamily = "sans",
ext.text=FALSE)
grid.draw(venn_object)
}
# get rid of log files
futile.logger::flog.threshold(futile.logger::ERROR, name = "VennDiagramLogger")
## NULL
Import lists of genes for Venn diagrams , DESEq2 outputs and genesets.
clines <- readRDS("~/dftd_RNAseq_annelise/dge/venn_clines.rds") # list genes cell lines
biopsies <- readRDS("~/dftd_RNAseq_annelise/dge/venn_biopsies.rds") # list genes biopsies
dge_clines <- readRDS("~/dftd_RNAseq_annelise/dge/dge_clines.rds") # DESeq2 cell lines
dge_biopsies <- readRDS("~/dftd_RNAseq_annelise/dge/dge_biopsies.rds") # DESeq2 biopsies
gt <- readRDS("~/dftd_RNAseq_annelise/dge/gt.rds") # homology
genesets <- gmt_import("~/dftd_RNAseq_annelise/ref/ReactomePathways_2023-07-14.gmt") # reactome
genesets2 <- gmt_import("~/dftd_RNAseq_annelise/ref/c2.cp.kegg.v2023.1.Hs.symbols.gmt.txt") # kegg
# MDS data
y_clines <- readRDS("~/dftd_RNAseq_annelise/dge/y_clines.rds")
y_biopsies <- readRDS("~/dftd_RNAseq_annelise/dge/y_biopsies.rds")
#MDS=PCoA
ss_clines <- read.table("/mnt/data/annelise/dftd_RNAseq_annelise/ss.tsv",sep="\t",fill=TRUE,header=TRUE)
ss_biopsies <- read.table("/mnt/data/annelise/dftd_RNAseq_annelise/ss_patchett.txt",sep="\t",fill=TRUE,header=TRUE)
ss <- data.frame(run=c(ss_clines$C, ss_biopsies$run_accession),
ID=c(ss_clines$ClientID, ss_biopsies$sample_id),
DFT=c(ss_clines$DFT, ss_biopsies$DFT),
replicate=(c(ss_clines$Replicate, c(1,1,2,2,1,1,2,2,2,2,1,1,1,2))),
sample_type=c(rep("cell_line",19), ss_biopsies$sample_type))
ss <- na.omit(ss)
ss$DFT <- as.factor(ss$DFT)
ss %>%
kbl(caption="Sample sheet for all samples") %>%
kable_paper("hover", full_width = F)
| run | ID | DFT | replicate | sample_type | |
|---|---|---|---|---|---|
| 1 | 6180766 | 4906_1-1 | DFT1 | 1 | cell_line |
| 2 | 6180767 | C5065_1-1 | DFT1 | 1 | cell_line |
| 3 | 6180768 | 1426_1-1 | DFT1 | 1 | cell_line |
| 4 | 6180769 | RV_1-1 | DFT2 | 1 | cell_line |
| 5 | 6180770 | SN_1-1 | DFT2 | 1 | cell_line |
| 6 | 6180771 | TD549_1-1 | DFT2 | 1 | cell_line |
| 7 | 6180772 | 4906_2-1 | DFT1 | 2 | cell_line |
| 8 | 6180773 | C5065_2-1 | DFT1 | 2 | cell_line |
| 9 | 6180774 | 1426_2-1 | DFT1 | 2 | cell_line |
| 10 | 6180775 | RV_2-1 | DFT2 | 2 | cell_line |
| 11 | 6180776 | SN_2-1 | DFT2 | 2 | cell_line |
| 12 | 6180777 | TD549_2-1 | DFT2 | 2 | cell_line |
| 13 | 6180778 | 4906_3-1 | DFT1 | 3 | cell_line |
| 14 | 6180779 | C5065_3-1 | DFT1 | 3 | cell_line |
| 15 | 6180780 | 1426_3-1 | DFT1 | 3 | cell_line |
| 16 | 6180781 | RV_3-1 | DFT2 | 3 | cell_line |
| 17 | 6180782 | SN_3-1 | DFT2 | 3 | cell_line |
| 18 | 6180783 | TD549_3-1 | DFT2 | 3 | cell_line |
| 20 | ERR2804403 | sha_DFT1_1-1 | DFT1 | 1 | biopsy |
| 21 | ERR2804404 | sha_DFT1_1-2 | DFT1 | 1 | biopsy |
| 22 | ERR2804405 | sha_DFT1_2-1 | DFT1 | 2 | biopsy |
| 23 | ERR2804406 | sha_DFT1_2-2 | DFT1 | 2 | biopsy |
| 24 | ERR2804407 | sha_DFT2_1-1 | DFT2 | 1 | biopsy |
| 25 | ERR2804408 | sha_DFT2_1-2 | DFT2 | 1 | biopsy |
| 26 | ERR2804409 | sha_DFT2_2-1 | DFT2 | 2 | biopsy |
| 27 | ERR2804410 | sha_DFT2_2-2 | DFT2 | 2 | biopsy |
| 28 | ERR2852729 | sha_DFT2_RV_2-2 | DFT2 | 2 | cell_line |
| 29 | ERR2852728 | sha_DFT2_RV_2-1 | DFT2 | 2 | cell_line |
| 30 | ERR2852727 | sha_DFT2_RV_1-2 | DFT2 | 1 | cell_line |
| 31 | ERR2852726 | sha_DFT2_RV_1-1 | DFT2 | 1 | cell_line |
| 32 | SRR6266557 | C5065_UT_01 | DFT1 | 1 | cell_line |
| 33 | SRR6266556 | C5065_UT_02 | DFT1 | 2 | cell_line |
y <- cbind(y_clines, y_biopsies)
y <- y[,-c(27:32)] # remove Patchett cell lines
cs <- colSums(y)
cs <- cs[order(cs)]
par(mar=c(5,10,5,2))
barplot(cs,main="All samples",horiz=TRUE,las=1)
cols <- ss$DFT
cols <- gsub("DFT1","#B3B9DF",cols)
cols <- gsub("DFT2","#DDAFB7",cols)
pchs <- ss$sample_type
pchs <- gsub("cell_line",19,pchs)
pchs <- gsub("biopsy",17,pchs)
mymds <- plotMDS(y,plot=FALSE)
# fix the xlims
XMIN=min(mymds$x)
XMAX=max(mymds$x)
XMID=(XMAX+XMIN)/2
XMIN <- XMID + (XMIN-XMID)*1.1
XMAX <- XMID+(XMAX-XMID)*1.1
par(mar = c(5.1, 4.1, 4.1, 2.1) )
plotMDS(mymds,pch=as.numeric(pchs),cex=3,col=cols,main="Cell lines and Tumour biopsies",xlim=c(XMIN,XMAX))
text(mymds,labels=colnames(y))
mtext("blue=DFT1,pink=DFT2")
Some Venn diagrams to compare gene expression.
display_venn(x=list(clines[["allgenes"]], biopsies[["allgenes"]]),
category.names = c("Cell lines" , "Biopsies"),
main="All genes")
display_venn(x=list(clines[["sigdegs"]], biopsies[["sigdegs"]]),
category.names = c("Cell lines" , "Biopsies"),
main="DEGs",
sub="(padj < 0.05)")
display_venn(x=list(clines[["updegs"]], biopsies[["updegs"]]),
category.names = c("Cell lines" , "Biopsies"),
main="Up DEGs",
sub="(padj < 0.05; logFC > 0)")
display_venn(x=list(clines[["dndegs"]], biopsies[["dndegs"]]),
category.names = c("Cell lines" , "Biopsies"),
main="Down DEGs",
sub="(padj < 0.05; logFC < 0)")
# genes related to metabolism
display_venn(x=list(clines[["metreactgenes"]], biopsies[["metreactgenes"]]),
category.names = c("Cell lines" , "Biopsies"),
main="Metabolism genes - REACTOME")
display_venn(x=list(clines[["metsigreactdegs"]], biopsies[["metsigreactdegs"]]),
category.names = c("Cell lines" , "Biopsies"),
main="Metabolism DEGs - REACTOME",
sub="(padj < 0.05)")
display_venn(x=list(clines[["metsigreactupdegs"]], biopsies[["metsigreactupdegs"]]),
category.names = c("Cell lines" , "Biopsies"),
main="Metabolism up DEGs - REACTOME",
sub="(padj < 0.05; logFC > 0)")
display_venn(x=list(clines[["metsigreactdndegs"]], biopsies[["metsigreactdndegs"]]),
category.names = c("Cell lines" , "Biopsies"),
main="Metabolism down DEGs - REACTOME",
sub="(padj < 0.05; logFC < 0)")
Here we will perform a multi-contrast enrichment analysis (DFT1 vs DFT2 AND biopsies vs cell lines).
gsea_clines <- readRDS("~/dftd_RNAseq_annelise/dge/gsea_clines.rds")
gsea_biopsies <- readRDS("~/dftd_RNAseq_annelise/dge/gsea_biopsies.rds")
gsea_common <- intersect(gsea_clines$set,gsea_biopsies$set)
gsea_clines_only <- outersect(gsea_clines$set, gsea_common)
gsea_biopsies_only <- outersect(gsea_biopsies$set, gsea_common)
gsea_clines$dataset <- "Cell lines"
gsea_biopsies$dataset <- "Biopsies"
gsea_all <- rbind(gsea_clines,gsea_biopsies)
gsea_common_df <- filter(gsea_all, set %in% gsea_common)
ggplot(gsea_common_df, aes(x = s.dist, y = reorder(set, s.dist), color = p.adjustANOVA, size = setSize, shape=dataset)) +
geom_point(stat = 'identity') +
scale_color_viridis(option="mako", direction=1,limits = c(0, 0.05)) +
xlab("s dist") + ylab("pathway") + ggtitle("Common") +
theme_bw()+
theme(legend.position = c(-1.8,0.8))
## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
gsea_clines_m <- readRDS("~/dftd_RNAseq_annelise/dge/gsea_clines_metabo.rds")
gsea_biopsies_m <- readRDS("~/dftd_RNAseq_annelise/dge/gsea_biopsies_metabo.rds")
gsea_clines_m$dataset <- "Cell lines"
gsea_biopsies_m$dataset <- "Biopsies"
gsea_all_m <- rbind(gsea_clines_m,gsea_biopsies_m)
gsea_all_m$dataset <- factor(gsea_all_m$dataset, levels=c("Cell lines","Biopsies"))
ggplot(gsea_all_m, aes(x = s.dist, y = reorder(set, s.dist), color = p.adjustANOVA, size = setSize)) +
facet_grid(~dataset,scales = "free") +
geom_point(stat = 'identity') +
scale_color_viridis(option="mako", direction=1,limits = c(0, 0.05)) +
xlab("s dist") + ylab("pathway") + ggtitle("Metabolism") +
theme_bw()
gsea_common_df$plot <- "Common pathways"
gsea_all_m$plot <- "Metabolic pathways"
gsea_plots <- rbind(gsea_all_m,gsea_common_df)
gsea_plots$plot <- factor(gsea_plots$plot, levels=c("Metabolic pathways","Common pathways"))
rownames(dge_clines) <- gsub("\\..*","", rownames(dge_clines))
x <- list("Cell lines"=dge_clines, "Biopsies"=dge_biopsies)
y <- mitch_import(x, "deseq2", geneTable = gt)
## Note: Mean no. genes in input = 16699.5
## Note: no. genes in output = 12301
## Note: estimated proportion of input genes in output = 0.737
genesets <- readRDS(file = "~/dftd_RNAseq_annelise/ref/gsea/reactome_plus_custom.RDS")
# REACTOME
#The pathway 'Response to metal ions' causes a bug as the genenames appear to map to the same human gene.
#I think the duplicate data is causing problems.
setnames <-c( "Response to metal ions", "Metallothioneins bind metals")
genesets <- genesets[! names(genesets) %in% setnames]
res_react <- mitch_calc(y, genesets, resrows = 100)
## Note: When prioritising by significance (ie: small
## p-values), large effect sizes might be missed.
if ( !file.exists("mitch_comp_react.html") ) {
mitch_report(res_react, "mitch_comp_react.html")
}
#mitch_plots(res_react, "mitch_comp_react.pdf")
gsea_signif <- filter(res_react$enrichment_result, p.adjustMANOVA < 0.05)
react_metabosets <- readRDS("~/dftd_RNAseq_annelise/ref/reactome_metabo.rds")
metabo_gsea <- filter(gsea_signif, set %in% names(react_metabosets))
metabo_gsea <- gather(metabo_gsea, "dataset","s.dist", 4:5)
metabo_gsea[metabo_gsea == "s.Cell.lines"] <- "Cell lines"
metabo_gsea[metabo_gsea == "s.Biopsies"] <- "Biopsies"
ggplot(metabo_gsea, aes(x = s.dist, y = reorder(set, s.dist), color = p.adjustMANOVA, size = setSize, shape=dataset)) +
#facet_wrap(~dataset, ncol=1, nrow=2, scales = "free") +
geom_vline(xintercept = 0, colour="gray", linetype = "longdash")+
scale_size_continuous(breaks=c(10, 50, 100, 500, 1000))+
geom_point(stat = 'identity') +
scale_color_viridis(option="mako", direction=1,limits = c(0, 0.05)) +
xlab("s dist") + ylab("pathway") + ggtitle("Metabolism") +
theme_bw()
gsea_upboth <- filter(gsea_signif, s.Cell.lines > 0 & s.Biopsies > 0)
gsea_upboth <- gather(gsea_upboth, "dataset","s.dist", 4:5)
gsea_upboth[gsea_upboth == "s.Cell.lines"] <- "Cell lines"
gsea_upboth[gsea_upboth == "s.Biopsies"] <- "Biopsies"
gsea_downboth <- filter(gsea_signif, s.Cell.lines < 0 & s.Biopsies < 0)
gsea_downboth <- gather(gsea_downboth, "dataset","s.dist", 4:5)
gsea_downboth[gsea_downboth == "s.Cell.lines"] <- "Cell lines"
gsea_downboth[gsea_downboth == "s.Biopsies"] <- "Biopsies"
gsea_upcelllines_downbiopsies <- filter(gsea_signif, s.Cell.lines > 0 & s.Biopsies < 0)
gsea_upcelllines_downbiopsies <- gather(gsea_upcelllines_downbiopsies, "dataset","s.dist", 4:5)
gsea_upcelllines_downbiopsies[gsea_upcelllines_downbiopsies == "s.Cell.lines"] <- "Cell lines"
gsea_upcelllines_downbiopsies[gsea_upcelllines_downbiopsies == "s.Biopsies"] <- "Biopsies"
gsea_upbiopsies_downcelllines <- filter(gsea_signif, s.Cell.lines < 0 & s.Biopsies > 0)
gsea_upbiopsies_downcelllines <- gather(gsea_upbiopsies_downcelllines, "dataset","s.dist", 4:5)
gsea_upbiopsies_downcelllines[gsea_upbiopsies_downcelllines == "s.Cell.lines"] <- "Cell lines"
gsea_upbiopsies_downcelllines[gsea_upbiopsies_downcelllines == "s.Biopsies"] <- "Biopsies"
ggplot(gsea_upboth, aes(x = s.dist, y = reorder(set, s.dist), color = p.adjustMANOVA, size = setSize, shape=dataset)) +
#facet_wrap(~dataset, ncol=1, nrow=2, scales = "free") +
scale_size_continuous(breaks=c(10, 50, 100, 500, 1000))+
geom_vline(xintercept = 0, colour="gray", linetype = "longdash")+
geom_point(stat = 'identity') +
scale_color_viridis(option="mako", direction=1,limits = c(0, 0.05)) +
xlab("s dist") + ylab("pathway") + ggtitle("Concordant upregulation") +
theme_bw()
ggplot(gsea_downboth, aes(x = s.dist, y = reorder(set, s.dist), color = p.adjustMANOVA, size = setSize, shape=dataset)) +
#facet_wrap(~dataset, ncol=1, nrow=2, scales = "free") +
geom_vline(xintercept = 0, colour="gray", linetype = "longdash")+
scale_size_continuous(breaks=c(10, 50, 100, 500, 1000))+
geom_point(stat = 'identity') +
scale_color_viridis(option="mako", direction=1,limits = c(0, 0.05)) +
xlab("s dist") + ylab("pathway") + ggtitle("Concordant downregulation") +
theme_bw()
ggplot(gsea_upcelllines_downbiopsies, aes(x = s.dist, y = reorder(set, s.dist), color = p.adjustMANOVA, size = setSize, shape=dataset)) +
#facet_wrap(~dataset, ncol=1, nrow=2, scales = "free") +
geom_vline(xintercept = 0, colour="gray", linetype = "longdash")+
geom_point(stat = 'identity') +
scale_color_viridis(option="mako", direction=1,limits = c(0, 0.05)) +
scale_size_continuous(breaks=c(10, 50, 100, 500, 1000))+
xlab("s dist") + ylab("pathway") + ggtitle("Upregulation in cell lines \nDown regulation in biopsies") +
theme_bw()
ggplot(gsea_upbiopsies_downcelllines, aes(x = s.dist, y = reorder(set, s.dist), color = p.adjustMANOVA, size = setSize, shape=dataset)) +
#facet_wrap(~dataset, ncol=1, nrow=2, scales = "free") +
geom_vline(xintercept = 0, colour="gray", linetype = "longdash")+
geom_point(stat = 'identity') +
scale_color_viridis(option="mako", direction=1,limits = c(0, 0.05)) +
scale_size_continuous(breaks=c(10, 50, 100, 500, 1000))+
xlab("s dist") + ylab("pathway") + ggtitle("Downregulation in cell lines \nUp regulation in biopsies") +
theme_bw()
gsea_upboth$plot <- "Concordant positive enrichment"
gsea_upbiopsies_downcelllines$plot <- "Negative enrichment in cell lines \nPositive enrichment in biopsies"
gsea_upcelllines_downbiopsies$plot <- "Positive enrichment in cell lines \nNegative enrichment in biopsies"
gsea_downboth$plot <- "Concordant negative enrichment"
metabo_gsea$plot <- "Metabolism"
gsea_x <- rbind(gsea_upboth,gsea_upbiopsies_downcelllines, gsea_upcelllines_downbiopsies, gsea_downboth, metabo_gsea)
library(ggforce)
ggplot(gsea_x, aes(x = s.dist, y = reorder(set, s.dist), color = p.adjustMANOVA, size = setSize, shape=dataset)) +
facet_wrap(~plot, scales = "free", ncol=1, nrow=5) +
scale_size_continuous(breaks=c(10, 50, 100, 500, 1000))+
force_panelsizes(rows = c(3.5, 71.5, 11, 7, 2))+
geom_vline(xintercept = 0, colour="gray", linetype = "longdash")+
geom_point(stat = 'identity') +
scale_color_viridis(option="mako", direction=1,limits = c(0, 0.05)) +
xlab("s dist") + ylab("") + ggtitle("") +
theme_bw()
sessionInfo()
## R version 4.4.2 (2024-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.1 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.12.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0
##
## locale:
## [1] LC_CTYPE=en_AU.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_AU.UTF-8 LC_COLLATE=en_AU.UTF-8
## [5] LC_MONETARY=en_AU.UTF-8 LC_MESSAGES=en_AU.UTF-8
## [7] LC_PAPER=en_AU.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ggforce_0.4.2 tidyr_1.3.1 ggh4x_0.2.8
## [4] viridis_0.6.5 viridisLite_0.4.2 usefun_0.5.0
## [7] limma_3.60.4 kableExtra_1.4.0 dplyr_1.1.4
## [10] mitch_1.16.0 ggplot2_3.5.1 VennDiagram_1.7.3
## [13] futile.logger_1.4.3
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.5 beeswarm_0.4.0 xfun_0.49
## [4] bslib_0.8.0 caTools_1.18.3 htmlwidgets_1.6.4
## [7] GGally_2.2.1 bitops_1.0-8 vctrs_0.6.5
## [10] tools_4.4.2 generics_0.1.3 parallel_4.4.2
## [13] tibble_3.2.1 fansi_1.0.6 highr_0.11
## [16] pkgconfig_2.0.3 KernSmooth_2.23-24 RColorBrewer_1.1-3
## [19] lifecycle_1.0.4 farver_2.1.2 compiler_4.4.2
## [22] stringr_1.5.1 gplots_3.1.3.1 statmod_1.5.0
## [25] munsell_0.5.1 httpuv_1.6.15 PRROC_1.3.1
## [28] htmltools_0.5.8.1 sass_0.4.9 yaml_2.3.10
## [31] crayon_1.5.3 later_1.3.2 pillar_1.9.0
## [34] jquerylib_0.1.4 MASS_7.3-61 cachem_1.1.0
## [37] mime_0.12 ggstats_0.6.0 gtools_3.9.5
## [40] tidyselect_1.2.1 digest_0.6.37 stringi_1.8.4
## [43] reshape2_1.4.4 purrr_1.0.2 labeling_0.4.3
## [46] polyclip_1.10-7 fastmap_1.2.0 colorspace_2.1-1
## [49] cli_3.6.3 magrittr_2.0.3 utf8_1.2.4
## [52] withr_3.0.1 scales_1.3.0 promises_1.3.0
## [55] rmarkdown_2.28 lambda.r_1.2.4 gridExtra_2.3
## [58] shiny_1.9.1 evaluate_0.24.0 knitr_1.48
## [61] rlang_1.1.4 futile.options_1.0.1 Rcpp_1.0.13
## [64] xtable_1.8-4 glue_1.7.0 tweenr_2.0.3
## [67] echarts4r_0.4.5 formatR_1.14 xml2_1.3.6
## [70] svglite_2.1.3 rstudioapi_0.16.0 jsonlite_1.8.8
## [73] R6_2.5.1 plyr_1.8.9 systemfonts_1.1.0